In [1]:
import numpy as np
from sklearn.datasets import make_regression
from scipy.spatial.distance import norm
from itertools import product
from collections import OrderedDict
from plotly.graph_objs import *
import plotly.tools as tls
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode()
import time

from plot_helpers import *

X, y = make_regression(n_samples = int(1e5), n_features = 2, n_informative=2, random_state=0, noise=10)
X = (X - X.mean(axis=0))/X.std()
In [2]:
def ols_cost_function(X, y, params):
    '''
    OLS from linear regression
    '''
    n_observations = X.shape[0]
    avg_squared_residuals = ((predict(X, params) - y)**2).sum()/(2*n_observations)
    return avg_squared_residuals

def ols_gradient_of_cost_function(X, y, params):
    n_observations = X.shape[0]
    gradient = (predict(X, params) - y).dot(X)/n_observations
    return gradient
In [3]:
gd_param_history, gd_time_history = gradient_descent(X, y, ols_cost_function, ols_gradient_of_cost_function,
                initial_guess = np.array([0., 0.]))
Final gradient of cost function [-0.00079784 -0.00043608]
Final params [ 98.08097904  55.50870141]
In [4]:
# SGD: LINEAR REGRESSION
sgd_param_history, sgd_time_history = stochastic_gradient_descent(X, y, ols_cost_function, 
                                                ols_gradient_of_cost_function,
                                                initial_guess=np.array([0., 0.]),
                                                learning_rate=.1)
figure_3d = plot_sgd_results(X, y, ols_cost_function, sgd_param_history)
iplot(figure_3d)
max iterations reached
Final gradient of cost function [-1.14407406 -0.34370507]
Final params [ 96.9392301   55.16887573]
In [5]:
# MINIBATCH SGD: LINEAR REGRESSION
minibatch_param_history, minibatch_time_history = stochastic_gradient_descent(X, y, ols_cost_function, 
                                                ols_gradient_of_cost_function,
                                                initial_guess=np.array([0., 0.]),
                                                learning_rate=.1, batch_size=5)
figure_3d = plot_sgd_results(X, y, ols_cost_function, minibatch_param_history)
iplot(figure_3d)
max iterations reached
Final gradient of cost function [ 0.65890068 -0.45611064]
Final params [ 98.74185041  55.05075632]
In [6]:
# PLOT GD, SGD, and minibatch SGD convergence on log scale
gd_convergence = Scatter(
    x = [i for i,j in enumerate(gd_param_history)],
    y = [ols_cost_function(X, y, (p[0], p[1])) for p in gd_param_history],
    mode = 'lines',
    name = 'Batch Gradient Descent'
)

sgd_convergence = Scatter(
    x = [i for i,j in enumerate(sgd_param_history)],
    y = [ols_cost_function(X, y, (p[0], p[1])) for p in sgd_param_history],
    mode = 'lines',
    name = 'SGD'
)

minibatch_convergence = Scatter(
    x = [i for i,j in enumerate(minibatch_param_history)],
    y = [ols_cost_function(X, y, (p[0], p[1])) for p in minibatch_param_history],
    mode = 'lines',
    name = 'Mini-batch SGD'
)

layout = Layout(
    xaxis=XAxis(
        range=[0,150],
        title='steps'
    ),
    yaxis=YAxis(
        type='log',
        autorange=True,
        title='cost'
    )
)
data = Data([gd_convergence, sgd_convergence, minibatch_convergence])
figure = Figure(data=data, layout=layout)
iplot(figure)
In [7]:
# PLOT GD, SGD, and minibatch SGD convergence on log scale
gd_convergence = Scatter(
    x = [t - gd_time_history[0] for t in gd_time_history],
    y = [ols_cost_function(X, y, (p[0], p[1])) for p in gd_param_history],
    mode = 'lines',
    name = 'Batch Gradient Descent'
)

sgd_convergence = Scatter(
    x = [t - sgd_time_history[0] for t in sgd_time_history],
    y = [ols_cost_function(X, y, (p[0], p[1])) for p in sgd_param_history],
    mode = 'lines',
    name = 'SGD'
)

minibatch_convergence = Scatter(
    x = [t - minibatch_time_history[0] for t in minibatch_time_history],
    y = [ols_cost_function(X, y, (p[0], p[1])) for p in minibatch_param_history],
    mode = 'lines',
    name = 'Mini-batch SGD'
)

layout = Layout(
    xaxis=XAxis(
        range=[0,1],
        title='time'
    ),
    yaxis=YAxis(
        type='log',
        autorange=True,
    )
)
data = Data([gd_convergence, sgd_convergence, minibatch_convergence])
figure = Figure(data=data, layout=layout)
iplot(figure)